## Table I.1: Covariate Balance Plots for Senate and House Analysis in Figure 4: 

## Instructions ----------------------------------------------------------------
# Step 1: Adjust DATA_DIR to where data is is located
# Step 2: Adjust OUT_DIR to where output folder is
# Step 3: Run entire script
## setup -----------------------------------------------------------------------

# R Code to be run for PanelMatch Results. Ran on High-Performance Cluster.
# uncomment to run.


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
# Need to install to read/use PanelEstimate Objects
# devtools::install_github("insongkim/PanelMatch",ref = "big_data_checks")


# ######## SENATE
# # LOAD PACKAGES
# library(PanelMatch)
# library(dplyr)
# options(scipen=999)
library(PanelMatch)
library(dplyr)
options(scipen=999)

DATA_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/data"
OUT_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/output"    

### SUBSETTING AND PATH
naics_sub <- c("31", "32", "33") #### <- set industries to be analyzed
chamber_sub <- c("senate") #### <- set chamber to be analyzed
path <- file.path(DATA_DIR, "don_lob_all_ind_20210203.rds") ### set path to be analyzes
outcome_select <- "lob_any_bin"
treatment_select <- "don_any_bin"

#### SET ANALYSIS PARAMETERS ######
## refinement_method <- "CBPS.weight" # set refinement
refinement_method <- "mahalanobis" # set refinement
size_match <- 10 # size of matched set
lags_match_dv <- "1:3" # DV lags to match on
covariates_match <- "l_sale + l_emp" # other covariates to match on
## exact_match <- c("naics3", "govtrack_id") # exact matchin
exact_match <- c("govtrack_id") # exact matchin
treatments <- c("don_any_bin", "don_pac_bin", "don_ceo_bin", "don_gov_bin", "don_emp_bin") # treatment variable
treatname <- c("AnyDon", "PacDon", "CeoDon", "GovDon", "EmpDon") # treatment variable
outcomes <- c("lob_spon_bin", "lob_cosp_only_bin", "lob_comm_bin", "lob_any_bin")
outcome_names <- c("SpLob", "CoSpLob", "CommLob", "AnyLob")
chamber_names <- c("Senate", "House")


#### CREATE A DF WITH ANALYSIS PAREMTERS ######
specs_df <- data.frame(chamber = rep(rep(c("senate", "house"), each = 4), 5),
                       chamber_names = rep(rep(chamber_names, each = 4), 5),
                       treatments = rep(treatments, each = 8),
                       treatname = rep(treatname, each = 8),
                       outcomes = rep(outcomes, 10),
                       outcome_names = rep(outcome_names, 10),
                       refinement_method = rep(refinement_method, 40),
                       size_match = rep(size_match, 40),
                       exact_match1 = rep(exact_match[1], 40),
                       ## exact_match2 = rep(exact_match[2], 40),
                       covariate_match = rep(covariates_match, 40),
                       lags_match_dv = rep(lags_match_dv, 40),
                       stringsAsFactors = FALSE)

specs_df <- specs_df %>% filter(chamber %in% chamber_sub & outcomes == outcome_select & treatments == treatment_select) # subset by chosen chamber


# LOAD DATA, SUBSET, and keep only variables needed
don_lob <- readRDS(path)%>%
  filter(naics2 %in% naics_sub, chamber == chamber_sub) %>%
  mutate(l_sale = log(sale+1), l_emp = log(emp+1)) %>%
  select(gvkey_govtrack_id, gvkey, govtrack_id, year, naics, naics3, naics2, chamber, # IDs
         lob_any_bin, lob_spon_bin, lob_cosp_only_bin, lob_comm_bin, # outcomes
         don_any_bin, don_pac_bin, don_emp_bin, don_ceo_bin, don_gov_bin, # outcomes, reverse analysis
         l_sale, l_emp) %>% # some recodings necessary
  mutate(gvkey_govtrack_id_numeric = as.integer(as.factor(gvkey_govtrack_id)),
         year = as.integer(year)) #


# need to set as DF, if not PM breaks sometimes
don_lob <- as.data.frame(don_lob)
# 
# # print number of rows
# print(paste0("using dataset with ", nrow(don_lob), " rows."))
# 
# 
# 
# # start loop across all outcomes
# for (i in 1:dim(specs_df)[1]) {
# 
#   # set up the respective outcome names for plot titles
#   outcome_name <- specs_df$outcome_names[i] # change with outcome used
#   chamber_name <- specs_df$chamber_names[i] #
#   treatment_name <- specs_df$treatname[i]
# 
# 
# 
#   covs_formula <- paste0("~ I(lag(", specs_df$outcomes[i], ", ", lags_match_dv, ")) + ", specs_df$covariate_match[i])
# 
# 
# 
#   ##### Run Panelmatch
#   Matches.lobby_norefine <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                                        treatment = specs_df$treatments[i], match.missing = TRUE,
#                                        outcome.var = specs_df$outcomes[i], qoi = "att",
#                                        #covs.formula = as.formula(covs_formula),
#                                        ## exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]),
#                                        exact.match.variables = c(specs_df$exact_match1[i]),
#                                        forbid.treatment.reversal = FALSE,
#                                        restrict.control.period = 3,
#                                        size.match = specs_df$size_match[i],
#                                        data = don_lob,
#                                        refinement.method = "none")
# 
#   Matches.lobby_maha <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                                    treatment = specs_df$treatments[i], match.missing = TRUE,
#                                    outcome.var = specs_df$outcomes[i], qoi = "att",
#                                    covs.formula = as.formula(covs_formula),
#                                    ## exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]),
#                                    exact.match.variables = c(specs_df$exact_match1[i]),
#                                    forbid.treatment.reversal = FALSE,
#                                    restrict.control.period = 3,
#                                    size.match = specs_df$size_match[i],
#                                    data = don_lob,
#                                    refinement.method = "mahalanobis")
# 
#   Matches.lobby_cbps <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                                    treatment = specs_df$treatments[i], match.missing = TRUE,
#                                    outcome.var = specs_df$outcomes[i], qoi = "att",
#                                    covs.formula = as.formula(covs_formula),
#                                    ## exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]),
#                                    exact.match.variables = c(specs_df$exact_match1[i]),
#                                    forbid.treatment.reversal = FALSE,
#                                    restrict.control.period = 3,
#                                    size.match = specs_df$size_match[i],
#                                    data = don_lob,
#                                    refinement.method = "CBPS.weight")
# 
#   save(Matches.lobby_norefine, file = "../data/figI1/msets_ann_sen_norefine")
#   save(Matches.lobby_maha, file = "../data/figI1/msets_ann_sen_maha")
#   save(Matches.lobby_cbps, file = "../data/figI1/msets_ann_sen_cbps")
# }

# # LOAD PACKAGES
library(PanelMatch)
library(dplyr)
options(scipen=999)

DATA_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/data"
OUT_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/output"    


#### PLOT COVARIATE BALANCE PLOTS
# load the matched sets
load(file.path(DATA_DIR, "/figI1/msets_ann_sen_norefine"))
load(file.path(DATA_DIR, "/figI1/msets_ann_sen_maha"))
load(file.path(DATA_DIR, "/figI1/msets_ann_sen_cbps"))


# get covariate balance w/o refinement
## Plotting Parameters
plotdta1 <- get_covariate_balance(Matches.lobby_norefine$att, 
                                  data = don_lob, 
                                  main = "Before\nRefinement",
                                  ylab = "Standardized Mean\nDifferences for\nTreatment and Covariates",
                                  xlab = "Years relative to timing of Donation",
                                  legend = FALSE,
                                  covariates = c("l_sale", "l_emp", "lob_any_bin"),
                                  plot = FALSE, 
                                  ylim = c(-1, 3))

plotdta2 <- get_covariate_balance(Matches.lobby_cbps$att, 
                                  data = don_lob, 
                                  main = "CBPS",
                                  ylab = "Standardized Mean\nDifferences for\nTreatment and Covariates",
                                  xlab = "Years relative to timing of Donation",
                                  legend = FALSE,
                                  covariates = c("l_sale", "l_emp", "lob_any_bin"),
                                  plot = FALSE, 
                                  ylim = c(-1, 3))

plotdta3 <- get_covariate_balance(Matches.lobby_maha$att, 
                                  data = don_lob, 
                                  main = "Maha",
                                  ylab = "Standardized Mean\nDifferences for\nTreatment and Covariates",
                                  xlab = "Years relative to timing of Donation",
                                  legend = FALSE,
                                  covariates = c("l_sale", "l_emp", "lob_any_bin"),
                                  plot = FALSE, 
                                  ylim = c(-1, 3))



pdf(file = file.path(OUT_DIR, "figure_i1a.pdf"), # covbal_sen_norefine_cbps_maha.pdf
    height = 7, width = 18)

## Plotting Parameters
par(font.lab = 1.8, cex.lab = 2.5, cex.axis = 2, 
    cex.main = 2.75, mfrow = c(1,3), mar = c(5,9,9,2))


# Maha Mset, senate, anylob~anydon
get_covariate_balance(Matches.lobby_norefine$att, 
                      data = don_lob, 
                      main = "Before\nRefinement",
                      ylab = "Standardized Mean Differences for\nTreatment and Covariates",
                      xlab = "Years relative to timing of Donation",
                      legend = FALSE,
                      covariates = c("l_sale", "l_emp", "lob_any_bin"),
                      plot = TRUE, 
                      ylim = c(-1, 3.5))

# overlay plot
lines(plotdta1[,1], col = "grey80", lwd = 2)
lines(plotdta1[,2], col = "grey55", lwd = 2)
lines(plotdta1[,3], col = "black", lwd = 4)

# Legend
legend("topleft", legend=c("Sales", "Employees", "Lobbying"),
       col=c("grey80", "grey55", "black"), bty="n",
       lty=c(1, 1, 1), cex=2)

get_covariate_balance(Matches.lobby_cbps$att, 
                      data = don_lob, main = "Covariate Balancing\nPropensity Score",
                      covariates = c("l_sale", "l_emp", "lob_any_bin"),
                      ylab = "",
                      xlab = "Years relative to timing of Donation",
                      legend = FALSE,
                      plot = TRUE, 
                      ylim = c(-1, 3.5))

# overlay plot
lines(plotdta2[,1], col = "grey80", lwd = 2)
lines(plotdta2[,2], col = "grey55", lwd = 2)
lines(plotdta2[,3], col = "black", lwd = 4)

get_covariate_balance(Matches.lobby_maha$att, 
                      data = don_lob, main = "Mahalanobis Distance\nMatching",
                      ylab = "",
                      xlab = "Years relative to timing of Donation",
                      legend = FALSE,
                      covariates = c("l_sale", "l_emp", "lob_any_bin"),
                      plot = TRUE, 
                      ylim = c(-1, 3.5))

# overlay plot
lines(plotdta3[,1], col = "grey80", lwd = 2)
lines(plotdta3[,2], col = "grey55", lwd = 2)
lines(plotdta3[,3], col = "black", lwd = 4)

dev.off()  




# ######## HOUSE 
# # LOAD PACKAGES
# library(PanelMatch)
# library(dplyr)
# options(scipen=999)
# 
### SUBSETTING AND PATH
naics_sub <- c("31", "32", "33") #### <- set industries to be analyzed
chamber_sub <- c("house") #### <- set chamber to be analyzed
path <- "../data/don_lob_all_ind_20210203.rds" ### set path to be analyzes
outcome_select <- "lob_any_bin"
treatment_select <- "don_any_bin"

#### SET ANALYSIS PARAMETERS ######
## refinement_method <- "CBPS.weight" # set refinement
refinement_method <- "mahalanobis" # set refinement
size_match <- 10 # size of matched set
lags_match_dv <- "1:3" # DV lags to match on
covariates_match <- "l_sale + l_emp" # other covariates to match on
## exact_match <- c("naics3", "govtrack_id") # exact matchin
exact_match <- c("govtrack_id") # exact matchin
treatments <- c("don_any_bin", "don_pac_bin", "don_ceo_bin", "don_gov_bin", "don_emp_bin") # treatment variable
treatname <- c("AnyDon", "PacDon", "CeoDon", "GovDon", "EmpDon") # treatment variable
outcomes <- c("lob_spon_bin", "lob_cosp_only_bin", "lob_comm_bin", "lob_any_bin")
outcome_names <- c("SpLob", "CoSpLob", "CommLob", "AnyLob")
chamber_names <- c("Senate", "House")


#### CREATE A DF WITH ANALYSIS PAREMTERS ######
specs_df <- data.frame(chamber = rep(rep(c("senate", "house"), each = 4), 5),
                       chamber_names = rep(rep(chamber_names, each = 4), 5),
                       treatments = rep(treatments, each = 8),
                       treatname = rep(treatname, each = 8),
                       outcomes = rep(outcomes, 10),
                       outcome_names = rep(outcome_names, 10),
                       refinement_method = rep(refinement_method, 40),
                       size_match = rep(size_match, 40),
                       exact_match1 = rep(exact_match[1], 40),
                       ## exact_match2 = rep(exact_match[2], 40),
                       covariate_match = rep(covariates_match, 40),
                       lags_match_dv = rep(lags_match_dv, 40),
                       stringsAsFactors = FALSE)

specs_df <- specs_df %>% filter(chamber %in% chamber_sub & outcomes == outcome_select & treatments == treatment_select) # subset by chosen chamber


# LOAD DATA, SUBSET, and keep only variables needed
don_lob <- readRDS(path) %>%
  filter(naics2 %in% naics_sub, chamber == chamber_sub) %>%
  mutate(l_sale = log(sale+1), l_emp = log(emp+1)) %>%
  select(gvkey_govtrack_id, gvkey, govtrack_id, year, naics, naics3, naics2, chamber, # IDs
         lob_any_bin, lob_spon_bin, lob_cosp_only_bin, lob_comm_bin, # outcomes
         don_any_bin, don_pac_bin, don_emp_bin, don_ceo_bin, don_gov_bin, # outcomes, reverse analysis
         l_sale, l_emp) %>% # some recodings necessary
  mutate(gvkey_govtrack_id_numeric = as.integer(as.factor(gvkey_govtrack_id)),
         year = as.integer(year)) #


# need to set as DF, if not PM breaks sometimes
don_lob <- as.data.frame(don_lob)
# 
# # print number of rows
# print(paste0("using dataset with ", nrow(don_lob), " rows."))
# 
# 
# 
# # start loop across all outcomes
# for (i in 1:dim(specs_df)[1]) {
#   
#   # set up the respective outcome names for plot titles
#   outcome_name <- specs_df$outcome_names[i] # change with outcome used
#   chamber_name <- specs_df$chamber_names[i] #
#   treatment_name <- specs_df$treatname[i]
#   
#   
#   
#   covs_formula <- paste0("~ I(lag(", specs_df$outcomes[i], ", ", lags_match_dv, ")) + ", specs_df$covariate_match[i])
#   
#   
#   
#   ##### Run Panelmatch
#   Matches.lobby_house_norefine <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                                              treatment = specs_df$treatments[i], match.missing = TRUE,
#                                              outcome.var = specs_df$outcomes[i], qoi = "att",
#                                              #covs.formula = as.formula(covs_formula),
#                                              ## exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]),
#                                              exact.match.variables = c(specs_df$exact_match1[i]),
#                                              forbid.treatment.reversal = FALSE,
#                                              restrict.control.period = 3,
#                                              size.match = specs_df$size_match[i],
#                                              data = don_lob,
#                                              refinement.method = "none")
#   
#   Matches.lobby_house_maha <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                                          treatment = specs_df$treatments[i], match.missing = TRUE,
#                                          outcome.var = specs_df$outcomes[i], qoi = "att",
#                                          covs.formula = as.formula(covs_formula),
#                                          ## exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]),
#                                          exact.match.variables = c(specs_df$exact_match1[i]),
#                                          forbid.treatment.reversal = FALSE,
#                                          restrict.control.period = 3,
#                                          size.match = specs_df$size_match[i],
#                                          data = don_lob,
#                                          refinement.method = "mahalanobis")
#   
#   Matches.lobby_house_cbps <- PanelMatch(lag = 3, lead = 0:3, time.id = "year", unit.id = "gvkey_govtrack_id_numeric",
#                                          treatment = specs_df$treatments[i], match.missing = TRUE,
#                                          outcome.var = specs_df$outcomes[i], qoi = "att",
#                                          covs.formula = as.formula(covs_formula),
#                                          ## exact.match.variables = c(specs_df$exact_match1[i], specs_df$exact_match2[i]),
#                                          exact.match.variables = c(specs_df$exact_match1[i]),
#                                          forbid.treatment.reversal = FALSE,
#                                          restrict.control.period = 3,
#                                          size.match = specs_df$size_match[i],
#                                          data = don_lob,
#                                          refinement.method = "CBPS.weight")
#   
#   
#   save(Matches.lobby_house_norefine, file = "../data/figI1/msets_ann_house_norefine")
#   save(Matches.lobby_house_maha, file = "../data/figI1msets_ann_house_maha")
#   save(Matches.lobby_house_cbps, file = "../data/figI1/msets_ann_house_cbps")
#   
# }

DATA_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/data"
OUT_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package/output"    


#### PLOT COVARIATE BALANCE PLOTS
# load the matched sets
load(file.path(DATA_DIR, "/figI1/msets_ann_house_norefine"))
Matches.lobby_norefine <- Matches.lobby_house_norefine
load(file.path(DATA_DIR, "/figI1/msets_ann_house_maha"))
Matches.lobby_maha <- Matches.lobby
load(file.path(DATA_DIR, "/figI1/msets_ann_house_cbps"))
Matches.lobby_cbps <- Matches.lobby
rm(Matches.lobby, Matches.lobby_house_norefine)


# get covariate balance w/o refinement
## Plotting Parameters
plotdta1 <- get_covariate_balance(Matches.lobby_norefine$att, 
                                  data = don_lob, 
                                  main = "Before\nRefinement",
                                  ylab = "Standardized Mean\nDifferences for\nTreatment and Covariates",
                                  xlab = "Years relative to timing of Donation",
                                  legend = FALSE,
                                  covariates = c("l_sale", "l_emp", "lob_any_bin"),
                                  plot = FALSE, 
                                  ylim = c(-1, 3))

plotdta2 <- get_covariate_balance(Matches.lobby_cbps$att, 
                                  data = don_lob, 
                                  main = "CBPS",
                                  ylab = "Standardized Mean\nDifferences for\nTreatment and Covariates",
                                  xlab = "Years relative to timing of Donation",
                                  legend = FALSE,
                                  covariates = c("l_sale", "l_emp", "lob_any_bin"),
                                  plot = FALSE, 
                                  ylim = c(-1, 3))

plotdta3 <- get_covariate_balance(Matches.lobby_maha$att, 
                                  data = don_lob, 
                                  main = "Maha",
                                  ylab = "Standardized Mean\nDifferences for\nTreatment and Covariates",
                                  xlab = "Years relative to timing of Donation",
                                  legend = FALSE,
                                  covariates = c("l_sale", "l_emp", "lob_any_bin"),
                                  plot = FALSE, 
                                  ylim = c(-1, 3))



pdf(file = file.path(OUT_DIR, "figure_i1b.pdf"), # covbal_house_norefine_cbps_maha.pdf
    height = 7, width = 18)

## Plotting Parameters
par(font.lab = 1.8, cex.lab = 2.5, cex.axis = 2, 
    cex.main = 2.75, mfrow = c(1,3), mar = c(5,9,9,2))


# Maha Mset, senate, anylob~anydon
get_covariate_balance(Matches.lobby_norefine$att, 
                      data = don_lob, 
                      main = "Before\nRefinement",
                      ylab = "Standardized Mean Differences for\nTreatment and Covariates",
                      xlab = "Years relative to timing of Donation",
                      legend = FALSE,
                      covariates = c("l_sale", "l_emp", "lob_any_bin"),
                      plot = TRUE, 
                      ylim = c(-1, 3.5))

# overlay plot
lines(plotdta1[,1], col = "grey80", lwd = 2)
lines(plotdta1[,2], col = "grey55", lwd = 2)
lines(plotdta1[,3], col = "black", lwd = 4)

# Legend
legend("topleft", legend=c("Sales", "Employees", "Lobbying"),
       col=c("grey80", "grey55", "black"), bty="n",
       lty=c(1, 1, 1), cex=2)

get_covariate_balance(Matches.lobby_cbps$att, 
                      data = don_lob, main = "Covariate Balancing\nPropensity Score",
                      covariates = c("l_sale", "l_emp", "lob_any_bin"),
                      ylab = "",
                      xlab = "Years relative to timing of Donation",
                      legend = FALSE,
                      plot = TRUE, 
                      ylim = c(-1, 3.5))

# overlay plot
lines(plotdta2[,1], col = "grey80", lwd = 2)
lines(plotdta2[,2], col = "grey55", lwd = 2)
lines(plotdta2[,3], col = "black", lwd = 4)

get_covariate_balance(Matches.lobby_maha$att, 
                      data = don_lob, main = "Mahalanobis Distance\nMatching",
                      ylab = "",
                      xlab = "Years relative to timing of Donation",
                      legend = FALSE,
                      covariates = c("l_sale", "l_emp", "lob_any_bin"),
                      plot = TRUE, 
                      ylim = c(-1, 3.5))

# overlay plot
lines(plotdta3[,1], col = "grey80", lwd = 2)
lines(plotdta3[,2], col = "grey55", lwd = 2)
lines(plotdta3[,3], col = "black", lwd = 4)

dev.off()  


